from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np

from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
from nmt_utils import *
import matplotlib.pyplot as plt
%matplotlib inline


m = 10000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)

100%|██████████| 10000/10000 [00:00<00:00, 17940.17it/s]


dataset[:10]

[('thursday june 26 2003', '2003-06-26'),
 ('18 sep 1994', '1994-09-18'),
 ('4 january 1983', '1983-01-04'),
 ('wednesday february 13 1991', '1991-02-13'),
 ('21.08.03', '2003-08-21'),
 ('june 1 1984', '1984-06-01'),
 ('june 5 2017', '2017-06-05'),
 ('21 jan 2004', '2004-01-21'),
 ('april 4 1974', '1974-04-04'),
 ('sunday december 12 1976', '1976-12-12')]


Tx = 30
Ty = 10
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

print("X.shape:", X.shape)
print("Y.shape:", Y.shape)
print("Xoh.shape:", Xoh.shape)
print("Yoh.shape:", Yoh.shape)

X.shape: (10000, 30)
Y.shape: (10000, 10)
Xoh.shape: (10000, 30, 37)
Yoh.shape: (10000, 10, 11)


index = 0
print("Source date:", dataset[index][0])
print("Target date:", dataset[index][1])
print()
print("Source after preprocessing (indices):", X[index])
print("Target after preprocessing (indices):", Y[index])
print()
print("Source after preprocessing (one-hot):", Xoh[index])
print("Target after preprocessing (one-hot):", Yoh[index])

Source date: thursday june 26 2003
Target date: 2003-06-26

Source after preprocessing (indices): [30 20 31 28 29 16 13 34  0 22 31 25 17  0  5  9  0  5  3  3  6 36 36 36 36
 36 36 36 36 36]
Target after preprocessing (indices): [3 1 1 4 0 1 7 0 3 7]

Source after preprocessing (one-hot): [[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  1.]]
Target after preprocessing (one-hot): [[ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]]


# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)


# GRADED FUNCTION: one_step_attention

def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attention) LSTM cell
    """
    
    ### START CODE HERE ###
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    # For grading purposes, please list 'a' first and 's_prev' second, in this order.
    concat = concatenator([a, s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
    e = densor1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)
    energies = densor2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
    alphas = activator(energies)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
    context = dotor([alphas, a])
    ### END CODE HERE ###
    
    return context


n_a = 32 # number of units for the pre-attention, bi-directional LSTM's hidden state 'a'
n_s = 64 # number of units for the post-attention LSTM's hidden state "s"

# Please note, this is the post attention LSTM cell.  
# For the purposes of passing the automatic grader
# please do not modify this global variable.  This will be corrected once the automatic grader is also updated.
post_activation_LSTM_cell = LSTM(n_s, return_state = True) # post-attention LSTM 
output_layer = Dense(len(machine_vocab), activation=softmax)


# GRADED FUNCTION: model

def model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 (initial hidden state) and c0 (initial cell state)
    # for the decoder LSTM with shape (n_s,)
    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    # Initialize empty list of outputs
    outputs = []
    
    ### START CODE HERE ###
    
    # Step 1: Define your pre-attention Bi-LSTM. (≈ 1 line)
    a = Bidirectional(LSTM(n_a, return_sequences = True))(X)
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = one_step_attention(a, s)
        
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
    model = Model(inputs = [X, s0, c0], outputs = outputs)
    
    ### END CODE HERE ###
    
    return model


model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))


model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_1 (InputLayer)             (None, 30, 37)        0                                            
____________________________________________________________________________________________________
s0 (InputLayer)                  (None, 64)            0                                            
____________________________________________________________________________________________________
bidirectional_1 (Bidirectional)  (None, 30, 64)        17920       input_1[0][0]                    
____________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)   (None, 30, 64)        0           s0[0][0]                         
                                                                   lstm_2[0][0]                     
                                                                   lstm_2[1][0]                     
                                                                   lstm_2[2][0]                     
                                                                   lstm_2[3][0]                     
                                                                   lstm_2[4][0]                     
                                                                   lstm_2[5][0]                     
                                                                   lstm_2[6][0]                     
                                                                   lstm_2[7][0]                     
                                                                   lstm_2[8][0]                     
____________________________________________________________________________________________________
concatenate_2 (Concatenate)      (None, 30, 128)       0           bidirectional_1[0][0]            
                                                                   repeat_vector_2[0][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[1][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[2][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[3][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[4][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[5][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[6][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[7][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[8][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[9][0]            
____________________________________________________________________________________________________
dense_4 (Dense)                  (None, 30, 10)        1290        concatenate_2[0][0]              
                                                                   concatenate_2[1][0]              
                                                                   concatenate_2[2][0]              
                                                                   concatenate_2[3][0]              
                                                                   concatenate_2[4][0]              
                                                                   concatenate_2[5][0]              
                                                                   concatenate_2[6][0]              
                                                                   concatenate_2[7][0]              
                                                                   concatenate_2[8][0]              
                                                                   concatenate_2[9][0]              
____________________________________________________________________________________________________
dense_5 (Dense)                  (None, 30, 1)         11          dense_4[0][0]                    
                                                                   dense_4[1][0]                    
                                                                   dense_4[2][0]                    
                                                                   dense_4[3][0]                    
                                                                   dense_4[4][0]                    
                                                                   dense_4[5][0]                    
                                                                   dense_4[6][0]                    
                                                                   dense_4[7][0]                    
                                                                   dense_4[8][0]                    
                                                                   dense_4[9][0]                    
____________________________________________________________________________________________________
attention_weights (Activation)   (None, 30, 1)         0           dense_5[0][0]                    
                                                                   dense_5[1][0]                    
                                                                   dense_5[2][0]                    
                                                                   dense_5[3][0]                    
                                                                   dense_5[4][0]                    
                                                                   dense_5[5][0]                    
                                                                   dense_5[6][0]                    
                                                                   dense_5[7][0]                    
                                                                   dense_5[8][0]                    
                                                                   dense_5[9][0]                    
____________________________________________________________________________________________________
dot_2 (Dot)                      (None, 1, 64)         0           attention_weights[0][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[1][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[2][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[3][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[4][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[5][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[6][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[7][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[8][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[9][0]          
                                                                   bidirectional_1[0][0]            
____________________________________________________________________________________________________
c0 (InputLayer)                  (None, 64)            0                                            
____________________________________________________________________________________________________
lstm_2 (LSTM)                    [(None, 64), (None, 6 33024       dot_2[0][0]                      
                                                                   s0[0][0]                         
                                                                   c0[0][0]                         
                                                                   dot_2[1][0]                      
                                                                   lstm_2[0][0]                     
                                                                   lstm_2[0][2]                     
                                                                   dot_2[2][0]                      
                                                                   lstm_2[1][0]                     
                                                                   lstm_2[1][2]                     
                                                                   dot_2[3][0]                      
                                                                   lstm_2[2][0]                     
                                                                   lstm_2[2][2]                     
                                                                   dot_2[4][0]                      
                                                                   lstm_2[3][0]                     
                                                                   lstm_2[3][2]                     
                                                                   dot_2[5][0]                      
                                                                   lstm_2[4][0]                     
                                                                   lstm_2[4][2]                     
                                                                   dot_2[6][0]                      
                                                                   lstm_2[5][0]                     
                                                                   lstm_2[5][2]                     
                                                                   dot_2[7][0]                      
                                                                   lstm_2[6][0]                     
                                                                   lstm_2[6][2]                     
                                                                   dot_2[8][0]                      
                                                                   lstm_2[7][0]                     
                                                                   lstm_2[7][2]                     
                                                                   dot_2[9][0]                      
                                                                   lstm_2[8][0]                     
                                                                   lstm_2[8][2]                     
____________________________________________________________________________________________________
dense_6 (Dense)                  (None, 11)            715         lstm_2[0][0]                     
                                                                   lstm_2[1][0]                     
                                                                   lstm_2[2][0]                     
                                                                   lstm_2[3][0]                     
                                                                   lstm_2[4][0]                     
                                                                   lstm_2[5][0]                     
                                                                   lstm_2[6][0]                     
                                                                   lstm_2[7][0]                     
                                                                   lstm_2[8][0]                     
                                                                   lstm_2[9][0]                     
====================================================================================================
Total params: 52,960
Trainable params: 52,960
Non-trainable params: 0
____________________________________________________________________________________________________


### START CODE HERE ### (≈2 lines)
opt = Adam(lr = 0.005, beta_1 = 0.9, beta_2 = 0.999, decay = 0.01)
model.compile(optimizer = opt, loss = "categorical_crossentropy", metrics = ['accuracy'])
### END CODE HERE ###


s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))


model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)

Epoch 1/1
10000/10000 [==============================] - 54s - loss: 17.1519 - dense_6_loss_1: 1.3138 - dense_6_loss_2: 1.0667 - dense_6_loss_3: 1.7901 - dense_6_loss_4: 2.7081 - dense_6_loss_5: 0.8430 - dense_6_loss_6: 1.3397 - dense_6_loss_7: 2.7386 - dense_6_loss_8: 0.9896 - dense_6_loss_9: 1.7369 - dense_6_loss_10: 2.6255 - dense_6_acc_1: 0.4573 - dense_6_acc_2: 0.6558 - dense_6_acc_3: 0.2793 - dense_6_acc_4: 0.0701 - dense_6_acc_5: 0.9564 - dense_6_acc_6: 0.2614 - dense_6_acc_7: 0.0399 - dense_6_acc_8: 0.9690 - dense_6_acc_9: 0.2063 - dense_6_acc_10: 0.0897

<keras.callbacks.History at 0x7f43d3cca940>


model.load_weights('models/model.h5')


EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
for example in EXAMPLES:
    
    source = string_to_int(example, Tx, human_vocab)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    prediction = model.predict([source, s0, c0])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    
    print("source:", example)
    print("output:", ''.join(output),"\n")

source: 3 May 1979
output: 1979-05-03 

source: 5 April 09
output: 2009-05-05 

source: 21th of August 2016
output: 2016-08-21 

source: Tue 10 Jul 2007
output: 2007-07-10 

source: Saturday May 9 2018
output: 2018-05-09 

source: March 3 2001
output: 2001-03-03 

source: March 3rd 2001
output: 2001-03-03 

source: 1 March 2001
output: 2001-03-01


model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_1 (InputLayer)             (None, 30, 37)        0                                            
____________________________________________________________________________________________________
s0 (InputLayer)                  (None, 64)            0                                            
____________________________________________________________________________________________________
bidirectional_1 (Bidirectional)  (None, 30, 64)        17920       input_1[0][0]                    
____________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)   (None, 30, 64)        0           s0[0][0]                         
                                                                   lstm_2[0][0]                     
                                                                   lstm_2[1][0]                     
                                                                   lstm_2[2][0]                     
                                                                   lstm_2[3][0]                     
                                                                   lstm_2[4][0]                     
                                                                   lstm_2[5][0]                     
                                                                   lstm_2[6][0]                     
                                                                   lstm_2[7][0]                     
                                                                   lstm_2[8][0]                     
____________________________________________________________________________________________________
concatenate_2 (Concatenate)      (None, 30, 128)       0           bidirectional_1[0][0]            
                                                                   repeat_vector_2[0][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[1][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[2][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[3][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[4][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[5][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[6][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[7][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[8][0]            
                                                                   bidirectional_1[0][0]            
                                                                   repeat_vector_2[9][0]            
____________________________________________________________________________________________________
dense_4 (Dense)                  (None, 30, 10)        1290        concatenate_2[0][0]              
                                                                   concatenate_2[1][0]              
                                                                   concatenate_2[2][0]              
                                                                   concatenate_2[3][0]              
                                                                   concatenate_2[4][0]              
                                                                   concatenate_2[5][0]              
                                                                   concatenate_2[6][0]              
                                                                   concatenate_2[7][0]              
                                                                   concatenate_2[8][0]              
                                                                   concatenate_2[9][0]              
____________________________________________________________________________________________________
dense_5 (Dense)                  (None, 30, 1)         11          dense_4[0][0]                    
                                                                   dense_4[1][0]                    
                                                                   dense_4[2][0]                    
                                                                   dense_4[3][0]                    
                                                                   dense_4[4][0]                    
                                                                   dense_4[5][0]                    
                                                                   dense_4[6][0]                    
                                                                   dense_4[7][0]                    
                                                                   dense_4[8][0]                    
                                                                   dense_4[9][0]                    
____________________________________________________________________________________________________
attention_weights (Activation)   (None, 30, 1)         0           dense_5[0][0]                    
                                                                   dense_5[1][0]                    
                                                                   dense_5[2][0]                    
                                                                   dense_5[3][0]                    
                                                                   dense_5[4][0]                    
                                                                   dense_5[5][0]                    
                                                                   dense_5[6][0]                    
                                                                   dense_5[7][0]                    
                                                                   dense_5[8][0]                    
                                                                   dense_5[9][0]                    
____________________________________________________________________________________________________
dot_2 (Dot)                      (None, 1, 64)         0           attention_weights[0][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[1][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[2][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[3][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[4][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[5][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[6][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[7][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[8][0]          
                                                                   bidirectional_1[0][0]            
                                                                   attention_weights[9][0]          
                                                                   bidirectional_1[0][0]            
____________________________________________________________________________________________________
c0 (InputLayer)                  (None, 64)            0                                            
____________________________________________________________________________________________________
lstm_2 (LSTM)                    [(None, 64), (None, 6 33024       dot_2[0][0]                      
                                                                   s0[0][0]                         
                                                                   c0[0][0]                         
                                                                   dot_2[1][0]                      
                                                                   lstm_2[0][0]                     
                                                                   lstm_2[0][2]                     
                                                                   dot_2[2][0]                      
                                                                   lstm_2[1][0]                     
                                                                   lstm_2[1][2]                     
                                                                   dot_2[3][0]                      
                                                                   lstm_2[2][0]                     
                                                                   lstm_2[2][2]                     
                                                                   dot_2[4][0]                      
                                                                   lstm_2[3][0]                     
                                                                   lstm_2[3][2]                     
                                                                   dot_2[5][0]                      
                                                                   lstm_2[4][0]                     
                                                                   lstm_2[4][2]                     
                                                                   dot_2[6][0]                      
                                                                   lstm_2[5][0]                     
                                                                   lstm_2[5][2]                     
                                                                   dot_2[7][0]                      
                                                                   lstm_2[6][0]                     
                                                                   lstm_2[6][2]                     
                                                                   dot_2[8][0]                      
                                                                   lstm_2[7][0]                     
                                                                   lstm_2[7][2]                     
                                                                   dot_2[9][0]                      
                                                                   lstm_2[8][0]                     
                                                                   lstm_2[8][2]                     
____________________________________________________________________________________________________
dense_6 (Dense)                  (None, 11)            715         lstm_2[0][0]                     
                                                                   lstm_2[1][0]                     
                                                                   lstm_2[2][0]                     
                                                                   lstm_2[3][0]                     
                                                                   lstm_2[4][0]                     
                                                                   lstm_2[5][0]                     
                                                                   lstm_2[6][0]                     
                                                                   lstm_2[7][0]                     
                                                                   lstm_2[8][0]                     
                                                                   lstm_2[9][0]                     
====================================================================================================
Total params: 52,960
Trainable params: 52,960
Non-trainable params: 0
____________________________________________________________________________________________________


attention_map = plot_attention_map(model, human_vocab, inv_machine_vocab, "Tuesday 09 Oct 1993", num = 7, n_s = 64);

<matplotlib.figure.Figure at 0x7f43d3d2deb8>

Total params:	52,960
Trainable params:	52,960
Non-trainable params:	0
bidirectional_1's output shape	(None, 30, 64)
repeat_vector_1's output shape	(None, 30, 64)
concatenate_1's output shape	(None, 30, 128)
attention_weights's output shape	(None, 30, 1)
dot_1's output shape	(None, 1, 64)
dense_3's output shape	(None, 11)

Neural Machine Translation¶

Updates¶

If you were working on the notebook before this update...¶

List of updates¶

1 - Translating human readable dates into machine readable dates¶

1.1 - Dataset¶

2 - Neural machine translation with attention¶

2.1 - Attention mechanism¶

Pre-attention and Post-attention LSTMs on both sides of the attention mechanism¶

An LSTM has both a hidden state and cell state¶

Each time step does not use predictions from the previous time step¶

Concatenation of hidden states from the forward and backward pre-attention LSTMs¶

Computing "energies" $e^{\langle t, t' \rangle}$ as a function of $s^{\langle t-1 \rangle}$ and $a^{\langle t' \rangle}$¶

Implementation Details¶

one_step_attention¶

Clarifying 'context' and 'c'¶

Implement `one_step_attention`¶

model¶

Troubleshooting Note¶

Compile the model¶

Define inputs and outputs, and fit the model¶

3 - Visualizing Attention (Optional / Ungraded)¶

3.1 - Getting the attention weights from the network¶

Congratulations!¶

Here's what you should remember¶

Neural Machine Translation¶

Updates¶

If you were working on the notebook before this update...¶

List of updates¶

1 - Translating human readable dates into machine readable dates¶

1.1 - Dataset¶

2 - Neural machine translation with attention¶

2.1 - Attention mechanism¶

Pre-attention and Post-attention LSTMs on both sides of the attention mechanism¶

An LSTM has both a hidden state and cell state¶

Each time step does not use predictions from the previous time step¶

Concatenation of hidden states from the forward and backward pre-attention LSTMs¶

Computing "energies" $e^{\langle t, t' \rangle}$ as a function of $s^{\langle t-1 \rangle}$ and $a^{\langle t' \rangle}$¶

Implementation Details¶

one_step_attention¶

Clarifying 'context' and 'c'¶

Implement one_step_attention¶

model¶

Troubleshooting Note¶

Compile the model¶

Define inputs and outputs, and fit the model¶

3 - Visualizing Attention (Optional / Ungraded)¶

3.1 - Getting the attention weights from the network¶

Congratulations!¶

Here's what you should remember¶

Implement `one_step_attention`¶